He’s going to push his file to github for us to reference so we dont need to copy
Start with a ggplot add data and arstedic mapping geometric objests scales facet specs statistical transormation *coordinate system
#Lets get the data ready
data("gapminder")
str(gapminder) #6 variables, 2 factors, year int, lifeexpectancey, gdp per cap, etc.
## Classes 'tbl_df', 'tbl' and 'data.frame': 1704 obs. of 6 variables:
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num 28.8 30.3 32 34 36.1 ...
## $ pop : int 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num 779 821 853 836 740 ...
#ggplot2:: #reference the package without lib.
ggplot(data=gapminder, mapping = aes(x = year, y = pop)) +
geom_point() #put in some points
ggplot()+
geom_point(data = gapminder, mapping = aes( x= year, y= pop)) # this is useful for layering two datasets, you can do with smaller subsets of the data for layers
ggplot()+
geom_point(data = gapminder, mapping = aes( x= year, y= pop, color= continent))
ggplot(data = gapminder, mapping = aes( x= year, y= pop, color= continent))+
geom_point()
#Aggregate by contients
gapminder %>%
group_by(year,continent) %>%
summarise(pop= sum(pop)) %>%
ggplot(data = ., mapping = aes( x= year, y= pop, color= continent))+
geom_point() # our scale here is way off, not very useful to show
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning: Removed 8 rows containing missing values (geom_point).
# make a transformation to the Y - a log
gapminder %>%
group_by(year,continent) %>%
summarise(pop= sum(pop)) %>%
ggplot(data = ., mapping = aes( x= year, y= pop, color= continent))+
geom_point() +
scale_y_log10() # now we can see oceian, but at the cost of other things
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning in summarise_impl(.data, dots, environment(), caller_env()):
## integer overflow - use sum(as.numeric(.))
## Warning: Removed 8 rows containing missing values (geom_point).
view(gapminder)
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
ggplot(data = gapminder, mapping = aes( x= year, y=gdpPercap, color = country))+
geom_point() + #facet by countires - grip = two variabels, wrap = only one
facet_wrap(~continent)
# this got a loter nutty - we want to filter by Asia
gapminder%>%
filter(continent == "Asia") %>%
ggplot(data = ., mapping = aes( x= year, y=gdpPercap, color = country))+
geom_point() + #facet by countires - grip = two variabels, wrap = only one
facet_wrap(~continent)
#try two numeric values
ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp)) +
geom_point() # two numeric values - see some interesting realtionship kinda weird - well try the log
ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp)) +
geom_point() +
scale_x_log10() # this is alot easier to read - so an example of when you'd want to do it, check it out by contient
ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10() + # this is better yet
stat_smooth() # add the smoother line in, default is the loess smoother, local regressions, we can change that if we want
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10() + # this is better yet
stat_smooth( method = lm) # life expetancy on the log of gdp per cap ( using a linear model)
# you can make this graph even more confusing/ advanced
ggplot(data = gapminder, aes(x=gdpPercap, y = lifeExp, color = continent, size = pop)) +
geom_point() +
scale_x_log10() + # this is going to be bad - we increase point size by the population weight
stat_smooth( method = lm)
gapminder%>%
filter(continent == "Asia") %>%
ggplot(data = ., mapping = aes( x= year, y=gdpPercap, color = country))+
geom_point() + #facet by countires - grip = two variabels, wrap = only one
facet_wrap(~continent)
# changing mappong for pop
gapminder %>%
filter( continent== "Europe") %>%
group_by(country) %>%
summarise(gdpPercap = mean (gdpPercap),
lifeExp = mean( lifeExp))%>%
ggplot(data = . , aes(x=gdpPercap, y = lifeExp, shape = country)) +
geom_point() +
scale_x_log10() +
stat_smooth( method = lm)
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 30. Consider specifying shapes manually if you must have them.
## Warning: Removed 24 rows containing missing values (geom_point).
#Not really usefuly, but we can see that there are multiple shapes though
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp, fill = continent)) +
geom_point() +
scale_x_log10() +
stat_smooth( method = lm) # cool to see that colors
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp, color = continent)) +
scale_x_log10() +
stat_smooth( method = lm) # to get ride of points omit the geom_point
# dashed lines, hard to tell though which is which, cause it kind of makes it up
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp, linetype = continent)) +
geom_point(aes( color = continent))+ # you can add the color in the points
scale_x_log10() +
stat_smooth( method = lm)
#getting rid of the lines
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp)) +
geom_point(aes( color = continent, alpha = pop))+ # looks mysterious, alpha can take a fixed value for the color or you can change he hue of each point
scale_x_log10() +
stat_smooth( method = lm)
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp)) +
geom_point(aes( color = continent), alpha = .5) + # if you move the alpha outside of the aes () then you can control the weight/ hue, alpah ranges from 0-1
scale_x_log10() +
stat_smooth( method = lm, mapping = aes ( color = continent))
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp)) +
geom_point( alpha = .4) +
scale_x_log10() +
stat_smooth( method = lm) +
facet_wrap(~continent) # added a facet, shows the relationships, and breaks the scatter plots out
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp)) +
geom_point( alpha = .4) +
scale_x_log10() +
stat_smooth( method = lm) +
facet_wrap(~continent)+
xlab("GDP Per Capita")+
ylab ("Life Expectancy")+
ggtitle("Life Expectancy by GDP Per Capita", subtitle = "Faceted by Continet") # adding the titles, and the axisis
# Getting Weird again
ggplot(data = gapminder , mapping= aes(x=gdpPercap, y = lifeExp)) +
geom_point( alpha = .4) +
scale_x_log10() +
stat_smooth( method = lm) +
facet_wrap(~continent)+
xlab("GDP Per Capita")+
ylab ("Life Expectancy")+
ggtitle("Life Expectancy by GDP Per Capita", subtitle = "Faceted by Continet")+
coord_flip() #flipping the axis
gapminder %>%
ggplot(data = . , mapping= aes(x= continent, y = lifeExp)) +
geom_boxplot() # here we can see the box plots, but if we flip its a little easier to read
gapminder %>%
ggplot(data = . , mapping= aes(x= continent, y = lifeExp)) +
geom_boxplot() +
coord_flip() # yes, this is easier to read for sure
gapminder %>%
ggplot(data = . , mapping= aes(x= continent, y = lifeExp)) +
geom_boxplot() +
coord_flip()+
theme_minimal() #took out the grey background, classic would remove all of the lines
gapminder %>%
ggplot(data = . , mapping= aes(x= continent, y = lifeExp)) +
geom_point()+
geom_boxplot() +
coord_flip()+
theme_minimal() # a bit much, but lets you see a little more on the data, we can change this if we wanted
gapminder %>%
ggplot(data = . , mapping= aes(x= continent, y = lifeExp)) +
geom_point()+
geom_boxplot() +
coord_flip()+
theme_fivethirtyeight()
# west andersone themes, you can have plots in the different themes and colors etc.
gapminder %>%
ggplot(data = . , mapping= aes(x= continent, y = lifeExp)) +
stat_summary(fun.ymin = min,
fun.ymax = max,
fun.y=median,
alpha=.9)+
geom_point()+
coord_flip()+
theme_fivethirtyeight()
# another useful option is the id. so you can have fill, doge or gitter - if all you had was the min, median and max, you coudl still plot, you could pass them in directly and you could create some fake data and plot that around there if you wanted !